set matsize 11000

*****************************************************************************
* Table 1. Sample characteristics by treatment group
*****************************************************************************
use MSIE_AERI.dta, clear

bysort us t: sum female white black asian hisp_w hisp_nw other age test_s test_ms

*****************************************************************************
* Table 2. Effects of incentives on test scores, by country
*****************************************************************************
use MSIE_AERI.dta, clear

*** US
* column 1
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon if us==1, cluster(group)
* randomization test for column 1
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 2 
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)
* randomization test for column 2
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 


*** Shanghai
* column 3
reg score t sh_school2 sh_school3 sh_school4 t2018 if us==0, cluster(group)
* randomization test for column 3
randcmd((t) reg score t sh_school2 sh_school3 sh_school4 t2018 if us==0, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 

* column 4
reg score t sh_school2 sh_school3 sh_school4 t2018 age female if us==0, cluster(group)
* randomization test for column 4 
randcmd((t) reg score t sh_school2 sh_school3 sh_school4 t2018 age female if us==0, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

*** US-shanghai difference
use MSIE_AERI.dta, clear

* column 5
reg score t us us_t us_school1_reg us_school1_hon us_school2_reg us_school2_hon sh_school2 sh_school3 sh_school4 t2018 female black asian hisp_w hisp_nw other age agemissing, cluster(group)
* randomization test for column 5
randcmd((us_t) reg score t us us_t us_school1_reg us_school1_hon us_school2_reg us_school2_hon sh_school2 sh_school3 sh_school4 t2018 age female black asian hisp_w hisp_nw other, cluster(group)), strata(school) groupvar(group) treatvars(t) calc1(replace us_t=t*us) seed(28920) reps(10000) 


*****************************************************************************
* Table 3 Treatment effects on questions attempted and questions correct
*****************************************************************************
use MSIE_AERI_qlevel.dta, clear

***** Panel A: percent missing

*** US
* column 1
reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_w hisp_nw other if us==1, cluster(group)
* randomization test for column 1
randcmd((t) reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_w hisp_nw other if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 2
reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13, cluster(group)

* column 3
reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14, cluster(group)

* MHT adjustment for columns 2-3
wyoung, /// 
cmd("reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13, cluster(group)" ///
	"reg qa t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)


*** Shanghai
* column 4
reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0, cluster(group)
* randomization test for column 4
randcmd((t) xi: reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 

* column 5
reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13, cluster(group) 

* column 6
reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14, cluster(group) 

* MHT adjustment for columns 5-6
wyoung, /// 
cmd("reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13, cluster(group) " ///
	"reg qa t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14, cluster(group) ") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)



***** Panel B: percent correct of answered

*** US
* column 1
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & qa==1, cluster(group)
* randomization test for column 1
randcmd((t) reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & qa==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 2
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13 & qa==1, cluster(group)

* column 3
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14 & qa==1, cluster(group)

* MHT adjustment for columns 2-3
wyoung, /// 
cmd("reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13 & qa==1, cluster(group)" ///
	"reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14 & qa==1, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)

*** Shanghai
* column 4
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & qa==1, cluster(group) 
* randomization test for column 4
randcmd((t) reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & qa==1, cluster(group) ), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 5
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13 & qa==1, cluster(group)  

* column 6
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14 & qa==1, cluster(group) 

* MHT adjustment for columns 5-6
wyoung, /// 
cmd("reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13 & qa==1, cluster(group)  " ///
	"reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14 & qa==1, cluster(group) ") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)

***** Panel C percent correct

*** US
* column 1
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1, cluster(group)
* randomization test for column 1
randcmd((t) reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 2
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13, cluster(group)

* column 3
reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14, cluster(group)

* MHT adjustment for columns 2-3
wyoung, /// 
cmd("reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question<=13, cluster(group)" ///
	"reg qc t us_school1_reg us_school1_hon us_school2_reg us_school2_hon i.question age agemissing female black asian hisp_white hisp_nw other if us==1 & question>=14, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)

*** Shanghai 
* column 4
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0, cluster(group)
* randomization test for column 4
randcmd((t) reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000)

* column 5 
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13, cluster(group) 

* column 6
reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14, cluster(group) 

* MHT adjustment for columns 5-6
wyoung, /// 
cmd("reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question<=13, cluster(group)" ///
	"reg qc t sh_school2 sh_school3 sh_school4 t2018 i.question age female if us==0 & question>=14, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)



*****************************************************************************
* Table A1. Sensitivity of U.S. treatment effect to sample changes
*****************************************************************************
*** main sample
use MSIE_AERI.dta, clear

* column 1
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)
* randomization test for column 1
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 


*** Exclude if missing age, US
use MSIE_AERI.dta, clear
drop if agemissing==1

* column 2
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age if us==1, cluster(group)
* randomization test for column 2
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 
 
 
*** Include non-10th graders, US
use MSIE_AERI_ellnon10.dta, clear
drop if ell==1

* column 3
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing non10, cluster(group) 
* randomization test for column 3
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing non10, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 

*** Include ELL, US
use MSIE_AERI_ellnon10.dta, clear
drop if grade~=10

* column 4
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing ell, cluster(group) 
* randomization test for column 4
randcmd((t) reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon female black asian hisp_w hisp_nw other age agemissing ell, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 

*** replace school-track FE with baseline test score 
use MSIE_AERI.dta, clear

* column 5
reg score t test_s female black asian other hisp_w hisp_nw age agemissing if us==1, cluster(group) 
* randomization test for column 5
randcmd((t) reg score t test_s female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 


***********************************************************************************
* Table A.2. Treatment effects by predicted test score: Threshold regressions, U.S.
***********************************************************************************
use MSIE_AERI.dta, clear


***** threshold regressions to determine threshold numbers and cutoffs (not in paper)

** score threshold
threshold score if us==1, regionvars(t female black asian other hisp_w hisp_nw age agemissing) threshvar(scorehat) optthresh(4) 

* define resulting threshold variable
gen above_s=scorehat> 11.041977


** questions attempted threshold (no threshold break identified)
threshold qa if us==1, regionvars(t female black asian other hisp_w hisp_nw age agemissing) threshvar(scorehat) optthresh(4)


** proportion of attempted questions correct
threshold qc_pcta_pct if us==1, regionvars(t female black asian other hisp_w hisp_nw age agemissing) threshvar(scorehat) optthresh(4)

* define resulting threshold variable
gen above_qc=scorehat> 11.001752



***** Table A.2 regressions

*** Score
* column 1
reg score t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_s==0, cluster(group)

* column 2 
reg score t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_s==1, cluster(group)

* MHT adjustment for columns 1-2
wyoung, /// 
cmd("reg score t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_s==0, cluster(group)" ///
	"reg score t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_s==1, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920) 

*** Attempted
* column 3
reg qa t female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group) 
* randomization test for column 3
randcmd((t) reg qa t female black asian hisp_w hisp_nw other age agemissing if us==1, cluster(group)), strata(school) groupvar(group) treatvars(t) seed(28920) reps(10000) 

*** Proportion Correct
* column 4
reg qc_pcta_pct t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_qc==0, cluster(group) 
* column 5
reg qc_pcta_pct t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_qc==1, cluster(group) 

* MHT adjustment for columns 4-5
wyoung, /// 
cmd("reg qc_pcta_pct t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_qc==0, cluster(group)" ///
	"reg qc_pcta_pct t female black asian hisp_w hisp_nw other age agemissing if us==1 & above_qc==1, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920) 


*****************************************************************************
* Table A.3. Effect of incentives on test scores, by gender
*****************************************************************************
use MSIE_AERI.dta, clear

*** US
* column 1
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon age agemissing black asian hisp_w hisp_nw other if us==1 & female==0, cluster(group) 

* column 2
reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon age agemissing black asian hisp_w hisp_nw other if us==1 & female==1, cluster(group) 

* MHT adjustment for columns 1-2
wyoung, /// 
cmd("reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon age agemissing black asian hisp_w hisp_nw other if us==1 & female==0, cluster(group)" ///
	"reg score t us_school1_reg us_school1_hon us_school2_reg us_school2_hon age agemissing black asian hisp_w hisp_nw other if us==1 & female==1, cluster(group)") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920) 

*** Shanghai
* column 3
reg score t sh_school2 sh_school3 sh_school4 t2018 age if us==0 & female==0,  cluster(group) 
* column 4
reg score t sh_school2 sh_school3 sh_school4 t2018 age if us==0 & female==1,  cluster(group) 

* MHT adjustment for columns 3-4
wyoung, /// 
cmd("reg score t sh_school2 sh_school3 sh_school4 t2018 age if us==0 & female==0,  cluster(group)" ///
	"reg score t sh_school2 sh_school3 sh_school4 t2018 age if us==0 & female==1,  cluster(group) ") ///
cluster(group) familyp(t) bootstraps(10000) seed(28920)
